Name: Aaron Szabo
Last Editted: 12/15/19
Class: CMSC320
import pandas as pd
import geopandas as gpd
import folium
import shapely
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import math
import matplotlib.pyplot as plt
redline_data = gpd.read_file("MDBaltimore1937.geojson")
data = gpd.read_file("mygeodata/tl_2010_24510_bg10.geojson")
housing_data = pd.read_csv("2011_Housing_Market_Typology.csv")
def redlining_colormap(holc_grade):
if holc_grade == 'A':
return "#00ff00"
elif holc_grade == 'B':
return "#0000ff"
elif holc_grade == 'C':
return "#ffff00"
elif holc_grade == 'D':
return "#ff0000"
else:
return "#ffffff"
redline_data['color'] = redline_data['holc_grade'].apply(redlining_colormap)
perc_green = []
perc_blue = []
perc_yellow = []
perc_red = []
for index1,c_row in data.iterrows():
c_district = c_row['geometry']
c_area = c_district.area
is_green = False
is_blue = False
is_yellow = False
is_red = False
for index2,r_row in redline_data.iterrows():
r_area = r_row['geometry']
if c_district.intersects(r_area):
overlap_area = c_district.intersection(r_area).area
if (overlap_area > 0):
if r_row['holc_grade'] == 'A':
if is_green:
perc_green[index1] += overlap_area/c_area
else:
is_green = True
perc_green.append(overlap_area/c_area)
elif r_row['holc_grade'] == 'B':
if is_blue:
perc_blue[index1] += overlap_area/c_area
else:
is_blue = True
perc_blue.append(overlap_area/c_area)
elif r_row['holc_grade'] == 'C':
if is_yellow:
perc_yellow[index1] += overlap_area/c_area
else:
is_yellow = True
perc_yellow.append(overlap_area/c_area)
elif r_row['holc_grade'] == 'D':
if is_red:
perc_red[index1] += overlap_area/c_area
else:
is_red = True
perc_red.append(overlap_area/c_area)
if not is_green:
perc_green.append(0.0)
if not is_blue:
perc_blue.append(0.0)
if not is_yellow:
perc_yellow.append(0.0)
if not is_red:
perc_red.append(0.0)
data['perc_green'] = perc_green
data['perc_blue'] = perc_blue
data['perc_yellow'] = perc_yellow
data['perc_red'] = perc_red
housing_data['blockGroup'] = housing_data['blockGroup'].apply(lambda x: str(x) if (len(str(x)) == 7) else ('0'+str(x)))
missing = []
for index,d_row in data.iterrows():
found = False
for index2,h_row in housing_data.iterrows():
if ('24510' + h_row['blockGroup']) == d_row['GEOID10']:
found = True
break
if not found:
missing.append(d_row['geometry'])
30 missing data points out of 653 and no double counts
temp = com_data.copy()
census_geo = gpd.GeoDataFrame(temp, geometry=temp['geometry'])
census_geo.crs = {'init': 'epsg:4269'}
missing_df = pd.DataFrame(columns=['geometry'], data=missing)
missing_gdf = gpd.GeoDataFrame(missing_df, geometry=missing_df['geometry'])
missing_gdf.crs = {'init': 'epsg:4269'}
map_c = folium.Map(location=[39.29, -76.61], zoom_start=11)
folium.GeoJson(redline_data, name='Redlining Map', style_function=lambda feature: {
'fillColor': feature['properties']['color'],
'color': feature['properties']['color'],
'weight': 1,
'fillOpacity': 0.5,
}).add_to(map_c)
folium.GeoJson(census_geo, name='Census Districts', style_function=lambda feature: {
'fillColor': "#333333",
'color': "#000000",
'weight': 0.5,
'fillOpacity': 0.1
}).add_to(map_c)
folium.GeoJson(missing_gdf, name='Missing Census Districts', style_function=lambda feature: {
'fillColor': "#00ffff",
'color': "#000000",
'weight': 0.5,
'fillOpacity': 0.5
}).add_to(map_c)
folium.LayerControl().add_to(map_c)
map_c
com_data = pd.DataFrame(columns=['GEOID10', 'geometry', 'area', 'perc_green', 'perc_blue', 'perc_yellow', 'perc_red', 'ratio_vacant', 'ratio_foreclosed', 'ratio_sales', 'median_sale_price', 'log_ratio_vacant', 'log_ratio_foreclosed', 'log_ratio_sales', 'log_median_sale_price'])
for index,d_row in data.iterrows():
area = d_row['ALAND10']/2590000.0
new_row = {'GEOID10': d_row['GEOID10'],
'geometry': d_row['geometry'],
'area': area,
'perc_green': d_row['perc_green'],
'perc_blue': d_row['perc_blue'],
'perc_yellow': d_row['perc_yellow'],
'perc_red': d_row['perc_red']}
for index2,h_row in housing_data.iterrows():
if ('24510' + h_row['blockGroup']) == d_row['GEOID10']:
new_row['ratio_vacant'] = h_row['vacantLots']
if new_row['ratio_vacant'] == 0:
new_row['log_ratio_vacant'] = 0
else:
new_row['log_ratio_vacant'] = math.log(new_row['ratio_vacant'])
if h_row['foreclosureFilings'] > 100:
new_row['ratio_foreclosed'] = 100
else:
new_row['ratio_foreclosed'] = h_row['foreclosureFilings']
if new_row['ratio_foreclosed'] == 0:
new_row['log_ratio_foreclosed'] = 0
else:
new_row['log_ratio_foreclosed'] = math.log(new_row['ratio_foreclosed'])
if h_row['unitsPerSquareMile'] == 0:
new_row['ratio_sales'] = 0
else:
new_row['ratio_sales'] = h_row['sales20092010']/h_row['unitsPerSquareMile']*area
if new_row['ratio_sales'] == 0:
new_row['log_ratio_sales'] = 0
else:
new_row['log_ratio_sales'] = math.log(new_row['ratio_sales'])
new_row['median_sale_price'] = h_row['medianSalesPrice20092010']
if new_row['median_sale_price'] == 0:
new_row['log_median_sale_price'] = 0
else:
new_row['log_median_sale_price'] = math.log(new_row['median_sale_price'])
break
com_data = com_data.append(pd.Series(new_row), ignore_index=True)
com_data
com_data = com_data.dropna()
map_d = folium.Map(location=[39.29, -76.61], zoom_start=11)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Ratio of Vacancies',
data=census_geo,
columns=['GEOID10', 'ratio_vacant'],
key_on='feature.properties.GEOID10',
fill_color='Blues',
fill_opacity=0.5,
line_opacity=0.7,
legend_name='Ratio of Vacancies',
show=False).add_to(map_d)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Ratio of Foreclosures',
data=census_geo,
columns=['GEOID10', 'ratio_foreclosed'],
key_on='feature.properties.GEOID10',
fill_color='OrRd',
fill_opacity=0.7,
line_opacity=0.7,
legend_name='Ratio of Foreclosures',
show=False).add_to(map_d)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Ratio of Sales',
data=census_geo,
columns=['GEOID10', 'ratio_sales'],
key_on='feature.properties.GEOID10',
fill_color='Purples',
fill_opacity=0.7,
line_opacity=0.7,
legend_name='Ratio of Sales',
show=False).add_to(map_d)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Median Sales Price',
data=census_geo,
columns=['GEOID10', 'median_sale_price'],
key_on='feature.properties.GEOID10',
fill_color='BuGn',
fill_opacity=0.5,
line_opacity=0.7,
legend_name='Median Sales Price',
show=False).add_to(map_d)
folium.GeoJson(redline_data, name='Redlining Map', style_function=lambda feature: {
'fillColor': feature['properties']['color'],
'color': feature['properties']['color'],
'weight': 0.7,
'fillOpacity': 0.3,
}).add_to(map_d)
folium.LayerControl().add_to(map_d)
map_d
plt.hist(com_data['ratio_vacant'])
X = sm.add_constant(com_data[['perc_green', 'perc_blue', 'perc_yellow', 'perc_red']])
smmodel_v = sm.OLS(com_data['ratio_vacant'], X)
smfit_v = smmodel_v.fit()
print(smfit_v.summary())
plt.hist(com_data['ratio_foreclosed'])
smmodel_f = sm.OLS(com_data['ratio_foreclosed'], X)
smfit_f = smmodel_f.fit()
print(smfit_f.summary())
plt.hist(com_data['ratio_sales'])
smmodel_s = sm.OLS(com_data['ratio_sales'], X)
smfit_s = smmodel_s.fit()
print(smfit_s.summary())
plt.hist(com_data['median_sale_price'])
smmodel_p = sm.OLS(com_data['median_sale_price'], X)
smfit_p = smmodel_p.fit()
print(smfit_p.summary())
map_dl = folium.Map(location=[39.29, -76.61], zoom_start=11)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Log of Ratio of Vacancies',
data=census_geo,
columns=['GEOID10', 'log_ratio_vacant'],
key_on='feature.properties.GEOID10',
fill_color='Blues',
fill_opacity=0.5,
line_opacity=0.7,
legend_name='Log of Ratio of Vacancies',
show=False).add_to(map_dl)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Log of Ratio of Foreclosures',
data=census_geo,
columns=['GEOID10', 'log_ratio_foreclosed'],
key_on='feature.properties.GEOID10',
fill_color='OrRd',
fill_opacity=0.7,
line_opacity=0.7,
legend_name='Log og Ratio of Foreclosures',
show=False).add_to(map_dl)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Log of Ratio of Sales',
data=census_geo,
columns=['GEOID10', 'log_ratio_sales'],
key_on='feature.properties.GEOID10',
fill_color='Purples',
fill_opacity=0.7,
line_opacity=0.7,
legend_name='Log of Ratio of Sales',
show=False).add_to(map_dl)
folium.Choropleth(
geo_data=census_geo[['GEOID10', 'geometry']],
name='Log of Median Sales Price',
data=census_geo,
columns=['GEOID10', 'log_median_sale_price'],
key_on='feature.properties.GEOID10',
fill_color='BuGn',
fill_opacity=0.5,
line_opacity=0.7,
legend_name='Log of Median Sales Price',
show=False).add_to(map_dl)
folium.GeoJson(redline_data, name='Redlining Map', style_function=lambda feature: {
'fillColor': feature['properties']['color'],
'color': feature['properties']['color'],
'weight': 0.7,
'fillOpacity': 0.3,
}).add_to(map_dl)
folium.LayerControl().add_to(map_dl)
map_dl
plt.hist(com_data['log_ratio_vacant'])
smmodel_vl = sm.OLS(com_data['log_ratio_vacant'], X)
smfit_vl = smmodel_vl.fit()
print(smfit_vl.summary())
plt.hist(com_data['log_ratio_foreclosed'])
smmodel_fl = sm.OLS(com_data['log_ratio_foreclosed'], X)
smfit_fl = smmodel_fl.fit()
print(smfit_fl.summary())
plt.hist(com_data['log_ratio_sales'])
smmodel_sl = sm.OLS(com_data['log_ratio_sales'], X)
smfit_sl = smmodel_sl.fit()
print(smfit_sl.summary())
plt.hist(com_data['log_median_sale_price'])
smmodel_pl = sm.OLS(com_data['log_median_sale_price'], X)
smfit_pl = smmodel_pl.fit()
print(smfit_pl.summary())
median_zero_count = 0
for index,row in com_data.iterrows():
if row['median_sale_price'] == 0:
median_zero_count += 1
print(median_zero_count)
22 districts have a listed median sale price of 0
data_price = com_data.copy()
data_price['median_sale_price'] = data_price['median_sale_price'].apply(lambda x: x if (x>0) else np.nan)
data_price = data_price.dropna()
price_geo = gpd.GeoDataFrame(data_price, geometry=data_price['geometry'])
price_geo.crs = {'init': 'epsg:4269'}
map_dp = folium.Map(location=[39.29, -76.61], zoom_start=11)
folium.Choropleth(
geo_data=price_geo[['GEOID10', 'geometry']],
name='Median Sales Price',
data=price_geo,
columns=['GEOID10', 'median_sale_price'],
key_on='feature.properties.GEOID10',
fill_color='BuGn',
fill_opacity=0.5,
line_opacity=0.7,
legend_name='Median Sales Price',
show=False).add_to(map_dp)
folium.Choropleth(
geo_data=price_geo[['GEOID10', 'geometry']],
name='Log of Median Sales Price',
data=price_geo,
columns=['GEOID10', 'log_median_sale_price'],
key_on='feature.properties.GEOID10',
fill_color='BuGn',
fill_opacity=0.5,
line_opacity=0.7,
legend_name='Log of Median Sales Price',
show=False).add_to(map_dp)
folium.GeoJson(redline_data, name='Redlining Map', style_function=lambda feature: {
'fillColor': feature['properties']['color'],
'color': feature['properties']['color'],
'weight': 0.7,
'fillOpacity': 0.3,
}).add_to(map_dp)
folium.LayerControl().add_to(map_dp)
map_dp
plt.hist(data_price['median_sale_price'])
Xp = sm.add_constant(data_price[['perc_green', 'perc_blue', 'perc_yellow', 'perc_red']])
smmodel_pc = sm.OLS(data_price['median_sale_price'], Xp)
smfit_pc = smmodel_pc.fit()
print(smfit_pc.summary())
plt.hist(data_price['log_median_sale_price'])
smmodel_pcl = sm.OLS(data_price['log_median_sale_price'], Xp)
smfit_pcl = smmodel_pcl.fit()
print(smfit_pcl.summary())